## ============================================================================
## 09_si_s2_practitioner_effects.R
## ----------------------------------------------------------------------------
## Purpose:   Supplementary analyses for Study 2 (practitioner effects):
##            formal ATE tables, estimates by vignette type, and
##            covariate-adjusted estimates.
##            Corresponds to SI Section S2.3.
##
## Inputs:    combined_data.rds (via 00_setup.R)
## Outputs:   Tables/table_s31.tex,
##            Figures/fig_s15.pdf,
##            Tables/table_s33.tex,
##            Tables/table_s32.tex
## ============================================================================

source("00_setup.R")

## ---- Study 2: Setup -----
## Stack vignette datasets
mine_dat <-
  combined_dat %>%
  select(
    sample,
    admin_response_id,
    y_trust_m_n,
    y_reputation_m_n,
    y_effective_m_n,
    y_sincerity_m_n,
    Z_vignette_a,
    Z_vignette_order,
    Z_vig_first,
    Z_vig_second,
    Z_vig_first_type,
    Z_vig_second_type
  ) %>%
  group_by(sample) %>%
  mutate(
    y_mine_aidx = c(
      y_trust_m_n + y_reputation_m_n + y_effective_m_n +
        y_sincerity_m_n
    ),
    y_mine_aidx_s = glass_delta(Y = y_mine_aidx, Z = Z_vignette_a,
                                reference = "Control"),
    Z_vig_type = "Mining"
  ) %>%
  rename(y_aidx = y_mine_aidx,
         y_aidx_s = y_mine_aidx_s,
         Z = Z_vignette_a,
         Z_vig_order = Z_vignette_order,
         y_trust_n = y_trust_m_n,
         y_reputation_n = y_reputation_m_n,
         y_effective_n = y_effective_m_n,
         y_sincerity_n = y_sincerity_m_n)

ent_dat <-
  combined_dat %>%
  select(
    sample,
    admin_response_id,
    y_trust_e_n,
    y_reputation_e_n,
    y_effective_e_n,
    y_sincerity_e_n,
    Z_vignette_b,
    Z_vignette_order,
    Z_vig_first,
    Z_vig_second,
    Z_vig_first_type,
    Z_vig_second_type
  ) %>%
  group_by(sample) %>%
  mutate(
    y_ent_aidx = c(
      y_trust_e_n + y_reputation_e_n + y_effective_e_n +
        y_sincerity_e_n
    ),
    y_ent_aidx_s = glass_delta(Y = y_ent_aidx, Z = Z_vignette_b,
                               reference = "Control"),
    Z_vig_type = "Entertainment"
  ) %>%
  rename(y_aidx = y_ent_aidx,
         y_aidx_s = y_ent_aidx_s,
         Z = Z_vignette_b,
         Z_vig_order = Z_vignette_order,
         y_trust_n = y_trust_e_n,
         y_reputation_n = y_reputation_e_n,
         y_effective_n = y_effective_e_n,
         y_sincerity_n = y_sincerity_e_n
  )


stack_dat <-
  bind_rows(mine_dat, ent_dat) %>%
  group_by(sample) %>%
  ## Standardize for pooled estimation (i.e., intercept is control mean)
  mutate(y_aidx_s2 = glass_delta(Y = y_aidx, Z = Z,
                                 reference = "Control")) %>%
  ungroup()

### ------ Table S31: Estimated ATEs by sample ----

## Mining vignette
est_mine <-
  stack_dat %>%
  filter(Z_vig_type == "Mining") %>%
  group_by(sample) %>%
  group_modify(~ tidy(
    lm_lin(
      y_aidx_s ~ Z,
      covariates = ~ Z_vig_order,
      data = .x,
      clusters = admin_response_id
    )
  )) %>%
  ungroup() %>%
  mutate(estimator = "Mining")

## Entertainment vignette
est_ent <-
  stack_dat %>%
  filter(Z_vig_type == "Entertainment") %>%
  group_by(sample) %>%
  group_modify(~ tidy(
    lm_lin(
      y_aidx_s ~ Z,
      covariates = ~ Z_vig_order,
      data = .x,
      clusters = admin_response_id
    )
  )) %>%
  ungroup() %>%
  mutate(estimator = "Entertainment")

est_pool <-
  stack_dat %>%
  group_by(sample) %>%
  group_modify(~ tidy(
    lm_lin(
      y_aidx_s ~ Z,
      covariates = ~ Z_vig_order + Z_vig_type,
      data = .x,
      clusters = admin_response_id
    )
  )) %>%
  ungroup() %>%
  mutate(estimator = "Pooled")

vig_summary_unadj <-
  bind_rows(est_mine, est_ent, est_pool) %>%
  filter(term %in% c("ZSubstantive", "ZSymbolic"))  %>%
  mutate(
    conf.low90 =  estimate - qnorm(1 - (0.10) / 2) * std.error,
    conf.high90 = estimate + qnorm(1 - (0.10) / 2) * std.error,
    sample = factor(
      sample,
      levels = c("AU", "US"),
      labels = c("Australia", "United States")
    ),
    Z = factor(
      term,
      levels = rev(c("ZSymbolic", "ZSubstantive")),
      labels = rev(c("Symbolic", "Substantive"))
    ),
    estimator = factor(
      estimator,
      levels = c("Mining", "Pooled", "Entertainment"),
      labels = c("Mining Company", "Pooled", "Film Studio")
    )
  )

vig_summary_unadj %>%
  filter(estimator == "Pooled") %>%
  select(Z, sample, estimate, std.error, p.value, conf.low, conf.high,
         conf.low90, conf.high90) %>%
  mutate(across(where(is.numeric), \(x) round(x, 3)))

## Diffs in Australia (Substantive vs Symbolic):
pooled_au <- vig_summary_unadj %>% filter(estimator == "Pooled", sample == "Australia")
est_sub_au <- pooled_au %>% filter(Z == "Substantive") %>% pull(estimate)
est_sym_au <- pooled_au %>% filter(Z == "Symbolic") %>% pull(estimate)
se_sub_au  <- pooled_au %>% filter(Z == "Substantive") %>% pull(std.error)
se_sym_au  <- pooled_au %>% filter(Z == "Symbolic") %>% pull(std.error)
(diff_au <- est_sym_au - est_sub_au)
(se_au <- sqrt(se_sub_au^2 + se_sym_au^2))
round(c(diff_au - qnorm(1 - (0.05) / 2) * se_au, diff_au + qnorm(1 - (0.05) / 2) * se_au), 2)
round(c(diff_au - qnorm(1 - (0.10) / 2) * se_au, diff_au + qnorm(1 - (0.10) / 2) * se_au), 2)
2 * (1 - pnorm(diff_au / se_au))

## Diffs in U.S. (Substantive vs Symbolic):
pooled_us <- vig_summary_unadj %>% filter(estimator == "Pooled", sample == "United States")
est_sub_us <- pooled_us %>% filter(Z == "Substantive") %>% pull(estimate)
est_sym_us <- pooled_us %>% filter(Z == "Symbolic") %>% pull(estimate)
se_sub_us  <- pooled_us %>% filter(Z == "Substantive") %>% pull(std.error)
se_sym_us  <- pooled_us %>% filter(Z == "Symbolic") %>% pull(std.error)
(diff_us <- est_sym_us - est_sub_us)
(se_us <- sqrt(se_sub_us^2 + se_sym_us^2))
round(c(diff_us - qnorm(1 - (0.05) / 2) * se_us, diff_us + qnorm(1 - (0.05) / 2) * se_us), 2)
round(c(diff_us - qnorm(1 - (0.10) / 2) * se_us, diff_us + qnorm(1 - (0.10) / 2) * se_us), 2)
2 * (1 - pnorm(abs(diff_us) / se_us))

## Export table
est_diff <-
  vig_summary_unadj %>%
  filter(estimator == "Pooled") %>%
  select(Z, sample, estimate, std.error) %>%
  pivot_wider(
    names_from = sample,
    values_from = c(estimate, std.error),
    names_sep = "_"
  ) %>%
  mutate(
    est_diff = `estimate_Australia` - `estimate_United States`,
    se_diff = sqrt(`std.error_Australia`^2 + `std.error_United States`^2),
    ci_lwr_diff = est_diff - qnorm(0.975) * se_diff,
    ci_upr_diff = est_diff + qnorm(0.975) * se_diff,
    p_diff = 2 * pnorm(-abs(est_diff / se_diff)),
    sample = "Difference"
  ) %>%
  select(sample, Z, contains("_diff"))

est_tab <-
  vig_summary_unadj %>%
  filter(estimator == "Pooled") %>%
  mutate(entry = table_entry(est = estimate, se = std.error)) %>%
  select(Z, sample, entry) %>%
  pivot_wider(names_from = sample, values_from = entry) %>%
  left_join(
    est_diff %>%
      mutate(
        diff = table_entry(est = est_diff, se = se_diff),
        diff_ci = paste0("[", sprintf("%.2f", round(ci_lwr_diff, 2)), ", ", sprintf("%.2f", round(ci_upr_diff, 2)), "]"),
        diff_p = case_when(
          p_diff < 0.001 ~ "< 0.001",
          round(p_diff, 3) == 0.001 ~ "0.001",
          p_diff > 0.99 ~ "> 0.99",
          .default = paste(round(p_diff, 2))
        )
      ) %>%
      select(Z, diff, diff_ci, diff_p),
    by = "Z"
  ) %>%
  arrange(desc(Z))

est_tab %>%
  xtable(., ) %>%
  print(include.rownames = FALSE,
        include.colnames = FALSE,
        hline.after = c(),
        only.contents = TRUE,
        type = "latex",
        file = paste0(tab_dir, "table_s31.tex"))



### ------ Figure S15: Estimated ATEs by sample and vignette ----

pheight <- 0.45

p <-
  vig_summary_unadj %>%
  filter(estimator != "Pooled") %>%
  ggplot(data = ., aes(x = estimate, y = Z, fill = sample, color = sample,
                       group = sample)) +
  geom_vline(
    xintercept = 0,
    color = "black",
    linetype = "solid",
    linewidth = 0.2
  ) +
  geom_vline(
    xintercept = -0.2,
    color = "black",
    linetype = "dotted",
    linewidth = 0.2
  ) +
  geom_vline(
    xintercept = 0.2,
    color = "black",
    linetype = "dotted",
    linewidth = 0.2
  ) +
  geom_errorbar(
    aes(xmin = conf.low, xmax = conf.high),
    width = 0,
    position = position_dodgev(height = pheight),
    linewidth = .5,
    show.legend = TRUE,
    orientation = "y"
  ) +
  geom_errorbar(
    aes(xmin = conf.low90, xmax = conf.high90),
    width = 0,
    position = position_dodgev(height = pheight),
    linewidth = 1,
    show.legend = TRUE,
    orientation = "y"
  ) +
  geom_point(
    position = position_dodgev(height = pheight),
    shape = 22,
    size = 3,
    color = "white"
  ) +
  scale_x_continuous(limits = c(-0.25, 0.65), breaks = c(-0.2, 0, 0.20, 0.4, 0.6)) +
  scale_color_manual("", values = c("black", "darkgrey")) +
  scale_fill_manual("", values = c("black", "darkgrey")) +
  facet_col(~ estimator) +
  theme_tufte(base_size = 11) +
  theme(
    axis.ticks.y = element_blank(),
    axis.ticks.x = element_line(linewidth = 0.25),
    strip.text = element_text(size = 11, face = "bold"),
    axis.line.x = element_line(linewidth = 0.25),
    axis.title.x = element_text(size = 10),
    axis.text.y = element_text(size = 11),
    plot.margin = unit(c(
      t = -1,
      r = 0.5,
      b = 0,
      l = -0.8
    ), "lines")
  ) +
  labs(subtitle = "",
       x = "Average treatment effect (in standard units)",
       y = "") +
  theme(
    legend.position = "none",
    legend.margin = margin(0, 0, 0, 0),
    legend.box.spacing = unit(0, "pt"),
    legend.box.margin = margin(0, 0, 0, 0)
  )

g <-
  p +
  geom_label(
    data = p$data %>%
      filter(Z == "Symbolic", estimator == "Film Studio") %>%
      mutate(estimate = case_when(
        sample == "Australia" ~ 0.38,
        sample == "United States" ~ 0.40
      )),
    aes(label = sample),
    position = position_dodgev(height = pheight),
    color = "white",
    size = 2.8,
    show.legend = FALSE
  )

g

ggsave(
  filename = paste0(fig_dir, "fig_s15.pdf"),
  g,
  width = 5.5,
  height = 5
)

### ------ Table S33: Estimated ATEs by sample and vignette ----

## Export table
vig_diffs <-
  vig_summary_unadj %>%
  filter(estimator != "Pooled") %>%
  select(Z, estimator, sample, estimate, std.error) %>%
  pivot_wider(
    names_from = sample,
    values_from = c(estimate, std.error),
    names_sep = "_"
  ) %>%
  mutate(
    est_diff = estimate_Australia - `estimate_United States`,
    se_diff = sqrt(std.error_Australia^2 + `std.error_United States`^2),
    ci_lwr_diff = est_diff - 1.96 * se_diff,
    ci_upr_diff = est_diff + 1.96 * se_diff,
    p_value = 2 * pnorm(-abs(est_diff / se_diff))
  )

est_tab  <-
  vig_diffs %>%
  mutate(
    est_au = table_entry(est = estimate_Australia, se = std.error_Australia),
    est_us = table_entry(est = `estimate_United States`, se = `std.error_United States`),
    est_diff = table_entry(est = est_diff, se = se_diff),
    ci_diff = paste0("[", sprintf("%.2f", round(ci_lwr_diff, 2)), ", ", sprintf("%.2f", round(ci_upr_diff, 2)), "]"),
    p_diff =  case_when(
      p_value < 0.001 ~ "< 0.001",
      round(p_value, 3) == 0.001 ~ "0.001",
      p_value > 0.99 ~ "> 0.99",
      .default = paste(sprintf("%.2f", round(p_value, 2)))
    )
  ) %>%
  select(estimator, Z, est_au, est_us, est_diff, ci_diff, p_diff)



est_tab %>%
  arrange(estimator, desc(Z)) %>%
  xtable(., ) %>%
  print(include.rownames = FALSE,
        include.colnames = FALSE,
        hline.after = c(),
        only.contents = TRUE,
        type = "latex",
        file = paste0(tab_dir, "table_s33.tex"))

### ------ Estimate covariate-adjusted ATEs by sample ------

## NB: geographic variables are x_state_comb for Australian sample and
##     x_region for us sample

us_covs <-
  c(
    "x_edu_college",
    "x_employ_cat",
    "x_gender_f",
    "x_age_cat",
    "x_hhi",
    "x_native",
    "x_pid_comb2",
    "x_region",
    "x_ethnicity"
  )

au_covs <-
  c(
    "x_edu_college",
    "x_employ_cat",
    "x_gender_f",
    "x_age_cat",
    "x_hhi",
    "x_native",
    "x_pid_comb2",
    "x_state_comb",
    "x_ideo_cat"
  )


## Stack vignette datasets
mine_dat <-
  combined_dat %>%
  select(
    sample,
    admin_response_id,
    y_trust_m_n,
    y_reputation_m_n,
    y_effective_m_n,
    y_sincerity_m_n,
    Z_vignette_a,
    Z_vignette_order
  ) %>%
  mutate(
    y_mine_aidx = c(
      y_trust_m_n + y_reputation_m_n + y_effective_m_n +
        y_sincerity_m_n
    ),
    y_mine_aidx_s = glass_delta(Y = y_mine_aidx, Z = Z_vignette_a,
                                reference = "Control"),
    Z_vig_type = "Mining"
  ) %>%
  rename(y_aidx = y_mine_aidx,
         y_aidx_s = y_mine_aidx_s,
         Z = Z_vignette_a,
         Z_vig_order = Z_vignette_order)

ent_dat <-
  combined_dat %>%
  select(
    sample,
    admin_response_id,
    y_trust_e_n,
    y_reputation_e_n,
    y_effective_e_n,
    y_sincerity_e_n,
    Z_vignette_b,
    Z_vignette_order
  ) %>%
  mutate(
    y_ent_aidx = c(
      y_trust_e_n + y_reputation_e_n + y_effective_e_n +
        y_sincerity_e_n
    ),
    y_ent_aidx_s = glass_delta(Y = y_ent_aidx, Z = Z_vignette_b,
                               reference = "Control"),
    Z_vig_type = "Entertainment"
  ) %>%
  rename(y_aidx = y_ent_aidx,
         y_aidx_s = y_ent_aidx_s,
         Z = Z_vignette_b,
         Z_vig_order = Z_vignette_order)

set.seed(123)

## Australia: Substantive v. Control
est_au_sub <-
  bind_rows(mine_dat, ent_dat) %>%
  left_join(combined_dat %>% select(admin_response_id, all_of(au_covs)), by = "admin_response_id") %>%
  filter(Z %in% c("Control", "Substantive"), sample == "AU") %>%
  select(y_aidx_s, Z, Z_vig_order, Z_vig_type, all_of(au_covs)) %>%
  filter(complete.cases(.))

X <-
  model.matrix(as.formula(paste(
    "~ Z_vig_order + Z_vig_type +",
    paste(au_covs, collapse = "+")
  )), data = est_au_sub)

## Specify outcome for this loop
y <- est_au_sub %>% pull("y_aidx_s") %>% as.numeric()

## Binary treatment indicator
W <- as.numeric(est_au_sub$Z == "Substantive")

est_au_sub <-
  ate.randomForest(
    X = X,
    Y = y,
    W = W,
    conf.level = 0.95
  )

## United States: Substantive v. Control
est_us_sub <-
  bind_rows(mine_dat, ent_dat) %>%
  left_join(combined_dat %>% select(admin_response_id, all_of(us_covs)), by = "admin_response_id") %>%
  filter(Z %in% c("Control", "Substantive"), sample == "US") %>%
  select(y_aidx_s, Z, Z_vig_order, Z_vig_type, all_of(us_covs)) %>%
  filter(complete.cases(.))

X <-
  model.matrix(as.formula(paste(
    "~ Z_vig_order + Z_vig_type +",
    paste(us_covs, collapse = "+")
  )), data = est_us_sub)

## Specify outcome for this loop
y <- est_us_sub %>% pull("y_aidx_s") %>% as.numeric()

## Binary treatment indicator
W <- as.numeric(est_us_sub$Z == "Substantive")

est_us_sub <-
  ate.randomForest(
    X = X,
    Y = y,
    W = W,
    conf.level = 0.95
  )

## Australia: Symbolic v. Control
est_au_sym <-
  bind_rows(mine_dat, ent_dat) %>%
  left_join(combined_dat %>% select(admin_response_id, all_of(au_covs)), by = "admin_response_id") %>%
  filter(Z %in% c("Control", "Symbolic"), sample == "AU") %>%
  select(y_aidx_s, Z, Z_vig_order, Z_vig_type, all_of(au_covs)) %>%
  filter(complete.cases(.))

X <-
  model.matrix(as.formula(paste(
    "~ Z_vig_order + Z_vig_type +",
    paste(au_covs, collapse = "+")
  )), data = est_au_sym)

## Specify outcome for this loop
y <- est_au_sym %>% pull("y_aidx_s") %>% as.numeric()

## Binary treatment indicator
W <- as.numeric(est_au_sym$Z == "Symbolic")

est_au_sym <-
  ate.randomForest(
    X = X,
    Y = y,
    W = W,
    conf.level = 0.95
  )


## United States: Symbolic v. Control
est_us_sym <-
  bind_rows(mine_dat, ent_dat) %>%
  left_join(combined_dat %>% select(admin_response_id, all_of(us_covs)), by = "admin_response_id") %>%
  filter(Z %in% c("Control", "Symbolic"), sample == "US") %>%
  select(y_aidx_s, Z, Z_vig_order, Z_vig_type, all_of(us_covs)) %>%
  filter(complete.cases(.))

X <-
  model.matrix(as.formula(paste(
    "~ Z_vig_order + Z_vig_type +",
    paste(us_covs, collapse = "+")
  )), data = est_us_sym)

## Specify outcome for this loop
y <- est_us_sym %>% pull("y_aidx_s") %>% as.numeric()

## Binary treatment indicator
W <- as.numeric(est_us_sym$Z == "Symbolic")

est_us_sym <-
  ate.randomForest(
    X = X,
    Y = y,
    W = W,
    conf.level = 0.95
  )


est_au_adj <-
  bind_rows(
    data.frame(
      estimate = est_au_sub$tau,
      std.error = sqrt(est_au_sub$var),
      conf.low = est_au_sub$conf.int[1],
      conf.high = est_au_sub$conf.int[2],
      Z = "Substantive"
    ),
    data.frame(
      estimate = est_au_sym$tau,
      std.error = sqrt(est_au_sym$var),
      conf.low = est_au_sym$conf.int[1],
      conf.high = est_au_sym$conf.int[2],
      Z = "Symbolic"
    )
  )

est_us_adj <-
  bind_rows(
    data.frame(
      estimate = est_us_sub$tau,
      std.error = sqrt(est_us_sub$var),
      conf.low = est_us_sub$conf.int[1],
      conf.high = est_us_sub$conf.int[2],
      Z = "Substantive"
    ),
    data.frame(
      estimate = est_us_sym$tau,
      std.error = sqrt(est_us_sym$var),
      conf.low = est_us_sym$conf.int[1],
      conf.high = est_us_sym$conf.int[2],
      Z = "Symbolic"
    )
  )

### ------ Table S32: Adjusted v. unadjusted estimates ----- 

### Combined
vig_summary_adj <-
  bind_rows(
    est_au_adj %>% mutate(sample = "Australia"),
    est_us_adj %>% mutate(sample = "United States"),
  ) %>%
  mutate(
    sample = factor(sample, levels = rev(c(
      "United States", "Australia"
    ))),
    estimator = "Adjusted",
    statistic = estimate / std.error,
    p.value = 2 * (1 - pnorm(abs(statistic))),
    Z = factor(Z, levels = c("Substantive", "Symbolic")),
  )


summary_combined <-
  vig_summary_unadj %>%
  filter(estimator == "Pooled") %>%
  mutate(Z = factor(
    term,
    levels = c("ZSubstantive", "ZSymbolic"),
    labels = c("Substantive", "Symbolic")
  )) %>%
  select(estimate, std.error, statistic, p.value, conf.low, conf.high, Z, 
         sample, estimator) %>%
  mutate(estimator = "Unadjusted") %>%
  bind_rows(vig_summary_adj) %>%
  mutate(across(where(is.numeric), \(x) round(x, 3)))


### Export table of results
summary_combined %>%
  mutate(entry = table_entry(est = estimate, se = std.error)) %>%
  select(sample, Z, estimator, entry) %>%
  pivot_wider(names_from = c(estimator, sample),
              values_from = entry) %>%
  arrange(desc(Z)) %>%
  select(
    Z,
    Unadjusted_Australia,
    Adjusted_Australia,
    `Unadjusted_United States`,
    `Adjusted_United States`
  ) %>%
  xtable(., ) %>%
  print(
    include.rownames = FALSE,
    include.colnames = FALSE,
    hline.after = c(),
    only.contents = TRUE,
    type = "latex",
    file = paste0(tab_dir, "table_s32.tex")
  )

## ---- Session Info -----------------------------------------------------------
sessionInfo()
